load packages

library(knitr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(here)
## here() starts at C:/Users/edsqu/Documents/R/Portfolio

additional packages being used

library(tmap)
library(sf)
library(tidycensus)

load testing data

df1=readxl::read_excel(here("data","tot_tests_neighborhood.xlsx")) %>% 
  clean_names() %>%
  #mutate(date_reported= as_datetime(date_reported))
  separate(date_reported,into=c("date","hr"),sep = " ")%>%
  mutate(date_report=as_date(date))%>%
  group_by(neighborhood)%>%
  mutate(across(total_tests, ~ .-c(0,lag(.)[-1])))

Load Mapping Data

neigh=st_read(here("data","dc_neigh.geojson")) %>% clean_names()
## Reading layer `DC_Health_Planning_Neighborhoods' from data source 
##   `C:\Users\edsqu\Documents\R\Portfolio\data\dc_neigh.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 51 features and 6 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -77.11976 ymin: 38.79165 xmax: -76.9094 ymax: 38.99556
## Geodetic CRS:  WGS 84
class(neigh)
## [1] "sf"         "data.frame"

celect and clean data

df_tests=df1 %>%
  filter(as_date(date_report) == "2021/10/27") %>% 
  separate(neighborhood,into=c("code","name"),sep = ":") %>% 
  mutate(code=case_when(
    code=="N35" ~"N0",
    TRUE ~ code
  ))

join mapping data with test data

neigh2=left_join(neigh,df_tests,by=c("code")) 
tmap_mode("view")
tm_shape(neigh2) +tm_polygons("total_tests",alpha=.5)

load positive cases data

df_c=readxl::read_excel(here("data","neigh_cases.xlsx"),
                            col_types = c("date", "text", "numeric")) %>% 
  clean_names() 
df_cases=df_c %>%
  filter(as_date(date) == "2021/10/27") %>% 
  separate(neighborhood,into=c("code","name"),sep = ":") %>%
  mutate(code=case_when(
    code=="N35" ~"N0",
    TRUE ~ code
  ))

Join covid data

neigh3=left_join(neigh2,df_cases,by=c("code")) 
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(neigh3) +tm_polygons("total_positives",alpha=.5)

graph data

 tm_shape(neigh3)+tm_polygons(c("total_positives","total_tests"),alpha=.5)

get census Data

census_api_key("2c8b9d5c4902b7efb4e1f98b2c23692cb1b73e95")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
v20 = load_variables(2018,"acs5")
df_cencus=get_acs(geography = "tract",
                  variables=c("pop"="B01001_001"),
                  state="DC",geometry=TRUE,year=2018) 
## Getting data from the 2014-2018 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================================| 100%
df_cens=df_cencus %>% select(-moe) %>% spread(variable,estimate) 
df_cens_adj=df_cens %>% st_transform(4326)

Join Census data to mapping data

df_j=st_join(df_cens_adj,neigh3,largest=TRUE)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries

Summarize population for each neighbourhood

df3=df_j %>% select(pop,code) %>%
  group_by(code) %>%
  summarise(pop_n=sum(pop))

Join population data and generate results

Removed any extreme outliers and generated map to compare testing rates and covid rates

df4=left_join(neigh3,df3 %>% st_set_geometry(NULL))
## Joining, by = "code"
df4=df4 %>% mutate( covid_rate=total_positives/pop_n, test_rate=total_tests/pop_n)
df4 %>% filter(!(code %in% c("N0","N15","N24"))) %>% tm_shape()+tm_polygons(c("covid_rate","test_rate"))